/*
	Purpose: Using Abramitzky, Boustan, and Eriksson approach
	         (based on Goldenweiser 1916), calculate average 
	         net earnings for farmers at various levels and 
	         adjust for 1900 owner share. 
	Output: "CensusAg1900_farmerincome" dta files at various levels
	         (e.g., byrace_byregion, _byr_bys)
*/

clear all
set more off
cd "$Mydirectory1/1_DataSources/1900_IncomeScores/"

** Bring in data from tables 13 and 14 of Census of Agriculture 
	//Note: The source is https://www2.census.gov/library/publications/decennial/1900/volume-5/volume-5-p5.pdf
	import excel "./input/table13_14_analysis.xlsx", sheet("farmer_income") firstrow allstring clear

	drop if region=="USA"

	foreach var of varlist race farmers farmvalue buildings equipment value_notlivestock labor fertilizer owners partowners ownerstenants {
	destring `var', replace
	}
	
**********************
*** OWNERS
**********************

	gen fracowners = (owners+ partowners +ownerstenants)/farmers

**********************
*** INCOME
**********************

/* Will follow Abramitzky, Boustan, and Eriksson in computing how much 
   farmers are making. Source: http://www.nber.org/papers/w15684.pdf.
   Appendix Table A1 based off of Goldenweiser 1916. */

/*Divide total value of farm products not fed to livestock by number of farms. */
	gen value = value_notlivestock / farmers

/* Want to impute the value of house rent and food/fuel produced on farm and 
   consumed by family ==>26.6 percent of the farm output value */
	gen foodfuel = value * .266

* Income
	gen income = value + foodfuel

**********************
*** EXPENSES
**********************	
	
* Expense calculation will include these components: buildings, equipment, labor, and fertilizer
	gen equip_avg = equipment / farmers
	gen build_avg = buildings / farmers
	gen labor_avg = labor / farmers
	gen fert_avg = fertilizer / farmers

/* Goldenweiser reports that expenses on feed, seed, and 
   threshings are 77% of the expenses on labor + fertilizer. */
	gen lplusf = labor_avg + fert_avg
	gen feedseedthresh = 0.77 * lplusf

* Taxes are 0.6 percent of total value of farm
	gen totval_avg = farmvalue / farmers
	gen taxes = totval_avg * 0.006

* 5 percent depreciation on value of buildings
	gen dep_build = build_avg * 0.05
	
* 15 percent depreviation on value of machinery
	gen dep_equip = equip_avg * 0.15

* Sum up all expenses
	gen expenses = (labor_avg + fert_avg + feedseedthresh) + taxes + dep_build + dep_equip
	
**********************
*** NET EARNINGS 
**********************		

	gen netearn = income - expenses
	
* Convert measure from 1899$ into 1950 dollars
	//Note: Source is https://www.minneapolisfed.org/community/financial-and-economic-education/cpi-calculator-information/consumer-price-index-1800
	gen CPI1950 = 72.3
	gen CPI1899 = 25
	gen netearn00 = netearn * (CPI1950/CPI1899)
	label var netearn00 "Net 1900 earnings, in 1950$"
	
* Create measure that is adjusted for owners: sharecropping contracts regularly split 50-50, so give all non-owners 50% of farm income.
	gen netearn00_adj = netearn00*(fracowners) + .5*netearn00 *(1-fracowners)
	label var netearn00_adj "Net 1900 earnings, in 1950$, adjusted for 1900 owner share"

	drop CPI*
	
**********************
*** COLLAPSE TO MULTIPLE LEVELS
**********************		

	keep netearn00 netearn00_adj region race farmers
	gen south_merge = region == "South Atlantic" | region == "South Central"
	
	gen region_merge = 1 if region=="North Atlantic"
	replace region_merge=2 if region=="North Central"
	replace region_merge=3 if south==1
	replace region_merge=4 if region=="Western"
	
	gen fatheroccej=81
	
	tempfile fulldata
	save `fulldata'
	
*1. By occupation (only 1)

preserve
	collapse (mean) netearn00* [aw=farmers], by(fatheroccej)
	
	foreach var of varlist netearn00* {
	rename `var' `var'_byocc
	}
	
	tempfile byocc
	save `byocc'
	
restore 

*2. By race 

preserve
	collapse (mean) netearn00* [aw=farmers], by(race)
	
	foreach var of varlist netearn00* {
	rename `var' `var'_byrace
	}
	
	tempfile byrace
	save `byrace'
	
restore 

*3. By race by south 	
	
preserve 

	collapse (mean) netearn00* fatheroccej [aw=farmers], by(race south)
	
	foreach var of varlist netearn00* {
	rename `var' `var'_byr_bys
	}
	
	tempfile byrace_bysouth
	save `byrace_bysouth'
	
restore 

* 4. By south 

preserve 

	collapse (mean) netearn00* fatheroccej [aw=farmers], by(south)
	
	foreach var of varlist netearn00* {
	rename `var' `var'_bysouth
	}
	
	tempfile bysouth
	save `bysouth'
	
restore 


* Merge 
	
	use `byrace_bysouth', clear 
	merge m:1 race using `byrace', assert(3) nogen
	merge m:1 fatheroccej using `byocc', assert(3) nogen
	merge m:1 south using `bysouth', assert(3) nogen

	* Keep only adjusted measures
	keep fatheroccej race south  *adj*
	order fatheroccej south , before(race)
	
	label var netearn00_adj_byocc "Net 1900 earnings, in 1950$, adjusted for 1900 owner share, occ"
	label var netearn00_adj_byrace "Net 1900 earnings, in 1950$, adjusted for 1900 owner share, occ x race"
	label var netearn00_adj_byr_bys "Net 1900 earnings, in 1950$, adjusted for 1900 owner share, occ x race x south"
	label var netearn00_adj_bysouth "Net 1900 earnings, in 1950$, adjusted for 1900 owner share, occ x south"

	compress 
	save "./output/CensusAg1900_farmerincome_byrace_bysouth.dta", replace
	
*3. By race by region  	
	
	use `fulldata', clear 

	collapse (mean) netearn00* south fatheroccej [aw=farmers], by(race region_merge)
	
	foreach var of varlist netearn00* {
	rename `var' `var'_byr_byreg
	}
	
	* Keep only adjusted measures
	keep fatheroccej race region_merge *adj*
	order fatheroccej region_merge, before(race)
	
	label var netearn00_adj_byr_byreg "Net 1900 earnings, in 1950$, adjusted for 1900 owner share, occ x race x region"
	
	compress 
	save "./output/CensusAg1900_farmerincome_byrace_byregion.dta", replace

	
